"""
@author: wangkang
@contact: wangkang@autobio.com.cn
@file: parse_json.py
@time: 2020/1/22 15:15
@desc: 解析doccano的标注数据
"""

import json

for data_dype in ['上门服务', '问题原因']:

    with open('上门服务训练语料/%s.json' % data_dype, 'r', encoding='utf8')as file:
        papers = []
        for idx, line in enumerate(file.readlines()):
            # 目前，仅对前400个样本进行打标处理
            if idx < 400:
                dic = json.loads(line)
                with open("data_origin/%s/%s-%s.txtoriginal.txt" % (data_dype, data_dype, str(idx + 1)), "w", encoding='utf-8') as f:
                    f.write(dic['text'])
                with open("data_origin/%s/%s-%s.txt" % (data_dype, data_dype, str(idx + 1)), "a+", encoding='utf-8') as f:
                    for i in range(len(dic['labels'])):
                        entry_start, entry_end, entry_type = dic['labels'][i]
                        entry_name = dic['text'][entry_start: entry_end]
                        string = str(entry_name) + '\t' + str(entry_start) + '\t' + str(entry_end) + '\t' + str(entry_type) + '\n'
                        print(string)
                        f.write(string)
            pass
